Project_V1

Code
suppressPackageStartupMessages(library(readxl))
suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(plotly))

data = read_excel("Project_1_Data.xlsx", sheet = "pooled123")

filteredData = select(data, PID, BSSQ_1:BSSQ_16,ASSQ_1:ASSQ_16,age, VRexperience, ssq_modelled)

withVRexperience = filter(filteredData, VRexperience == 'Yes')
noVRexperience = filter(filteredData, VRexperience == 'No')

#calculating differences between baseline and active SSQ for each symptom
filteredData = mutate(filteredData, d_1 = ASSQ_1 - BSSQ_1)
filteredData = mutate(filteredData, d_2 = ASSQ_2 - BSSQ_2)
filteredData = mutate(filteredData, d_3 = ASSQ_3 - BSSQ_3)
filteredData = mutate(filteredData, d_4 = ASSQ_4 - BSSQ_4)
filteredData = mutate(filteredData, d_5 = ASSQ_5 - BSSQ_5)
filteredData = mutate(filteredData, d_6 = ASSQ_6 - BSSQ_6)
filteredData = mutate(filteredData, d_7 = ASSQ_7 - BSSQ_7)
filteredData = mutate(filteredData, d_8 = ASSQ_8 - BSSQ_8)
filteredData = mutate(filteredData, d_9 = ASSQ_9 - BSSQ_9)
filteredData = mutate(filteredData, d_10 = ASSQ_10 - BSSQ_10)
filteredData = mutate(filteredData, d_11 = ASSQ_11 - BSSQ_11)
filteredData = mutate(filteredData, d_12 = ASSQ_12 - BSSQ_12)
filteredData = mutate(filteredData, d_13 = ASSQ_13 - BSSQ_13)
filteredData = mutate(filteredData, d_14 = ASSQ_14 - BSSQ_14)
filteredData = mutate(filteredData, d_15 = ASSQ_15 - BSSQ_15)
filteredData = mutate(filteredData, d_16 = ASSQ_16 - BSSQ_16)

#reclasss VR experience as factor (was chr)
filteredData$VRexperience = as.factor(filteredData$VRexperience)

#we want to filter this data even further and split it into age groups
#once in age groups, calculate the mean change for each of the age groups for each symptom

filteredData = mutate(filteredData, age_group = case_when(
  age >= 16 & age <= 21 ~ "16 to 21",
  age >= 22 & age <= 29 ~ "22 to 29",
  age >= 30 & age <= 37 ~ "30 to 37",
  age >= 38 & age <= 45 ~ "38 to 45",
  age > 45 ~ "above 45"
))

#convert the age groups into factors
filteredData$age_group = as.factor(filteredData$age_group)
str(filteredData)
tibble [336 × 53] (S3: tbl_df/tbl/data.frame)
 $ PID         : num [1:336] 1201 1351 2351 1151 2201 ...
 $ BSSQ_1      : num [1:336] 0 4 1 0 0 2 0 0 2 1 ...
 $ BSSQ_2      : num [1:336] 0 2 7 0 0 5 0 1 2 0 ...
 $ BSSQ_3      : num [1:336] 0 0 0 0 0 1 0 1 0 1 ...
 $ BSSQ_4      : num [1:336] 0 1 0 0 0 0 2 0 0 1 ...
 $ BSSQ_5      : num [1:336] 0 4 2 0 0 4 1 0 0 1 ...
 $ BSSQ_6      : num [1:336] 0 0 0 0 0 0 0 0 0 0 ...
 $ BSSQ_7      : num [1:336] 0 0 0 0 0 0 0 0 1 0 ...
 $ BSSQ_8      : num [1:336] 0 0 0 0 0 0 0 0 0 0 ...
 $ BSSQ_9      : num [1:336] 0 4 1 0 0 2 1 0 0 1 ...
 $ BSSQ_10     : num [1:336] 0 0 0 0 0 0 0 0 0 1 ...
 $ BSSQ_11     : num [1:336] 0 0 0 0 0 0 0 0 0 2 ...
 $ BSSQ_12     : num [1:336] 0 0 0 0 0 0 0 0 0 1 ...
 $ BSSQ_13     : num [1:336] 0 0 0 0 0 0 0 0 0 0 ...
 $ BSSQ_14     : num [1:336] 0 0 0 0 0 0 0 0 0 0 ...
 $ BSSQ_15     : num [1:336] 0 3 1 0 0 1 0 0 0 1 ...
 $ BSSQ_16     : num [1:336] 0 0 0 0 0 0 0 0 1 0 ...
 $ ASSQ_1      : num [1:336] 5 3 0 0 0 5 1 5 7 2 ...
 $ ASSQ_2      : num [1:336] 2 1 5 0 0 1 0 2 3 0 ...
 $ ASSQ_3      : num [1:336] 0 0 0 0 0 1 1 2 1 2 ...
 $ ASSQ_4      : num [1:336] 2 0 1 0 0 2 0 1 2 3 ...
 $ ASSQ_5      : num [1:336] 2 3 1 0 0 0 0 3 4 0 ...
 $ ASSQ_6      : num [1:336] 2 0 0 0 0 0 0 0 1 0 ...
 $ ASSQ_7      : num [1:336] 0 0 0 0 0 0 0 0 3 1 ...
 $ ASSQ_8      : num [1:336] 3 0 0 1 0 3 0 1 6 1 ...
 $ ASSQ_9      : num [1:336] 3 3 0 0 0 0 0 1 3 0 ...
 $ ASSQ_10     : num [1:336] 4 0 3 0 0 0 1 4 5 0 ...
 $ ASSQ_11     : num [1:336] 3 0 1 0 0 0 0 1 0 3 ...
 $ ASSQ_12     : num [1:336] 2 0 0 0 0 0 0 4 3 2 ...
 $ ASSQ_13     : num [1:336] 4 0 0 0 0 1 0 6 3 3 ...
 $ ASSQ_14     : num [1:336] 3 0 0 0 0 0 0 2 6 0 ...
 $ ASSQ_15     : num [1:336] 3 3 0 2 0 3 0 0 4 0 ...
 $ ASSQ_16     : num [1:336] 0 0 0 0 0 0 0 0 1 0 ...
 $ age         : num [1:336] 30 37 28 39 37 18 28 38 26 36 ...
 $ VRexperience: Factor w/ 2 levels "No","Yes": 2 1 1 2 2 2 2 2 2 2 ...
 $ ssq_modelled: num [1:336] 11 -1 -2 3 0 8 1 6 15 1 ...
 $ d_1         : num [1:336] 5 -1 -1 0 0 3 1 5 5 1 ...
 $ d_2         : num [1:336] 2 -1 -2 0 0 -4 0 1 1 0 ...
 $ d_3         : num [1:336] 0 0 0 0 0 0 1 1 1 1 ...
 $ d_4         : num [1:336] 2 -1 1 0 0 2 -2 1 2 2 ...
 $ d_5         : num [1:336] 2 -1 -1 0 0 -4 -1 3 4 -1 ...
 $ d_6         : num [1:336] 2 0 0 0 0 0 0 0 1 0 ...
 $ d_7         : num [1:336] 0 0 0 0 0 0 0 0 2 1 ...
 $ d_8         : num [1:336] 3 0 0 1 0 3 0 1 6 1 ...
 $ d_9         : num [1:336] 3 -1 -1 0 0 -2 -1 1 3 -1 ...
 $ d_10        : num [1:336] 4 0 3 0 0 0 1 4 5 -1 ...
 $ d_11        : num [1:336] 3 0 1 0 0 0 0 1 0 1 ...
 $ d_12        : num [1:336] 2 0 0 0 0 0 0 4 3 1 ...
 $ d_13        : num [1:336] 4 0 0 0 0 1 0 6 3 3 ...
 $ d_14        : num [1:336] 3 0 0 0 0 0 0 2 6 0 ...
 $ d_15        : num [1:336] 3 0 -1 2 0 2 0 0 4 -1 ...
 $ d_16        : num [1:336] 0 0 0 0 0 0 0 0 0 0 ...
 $ age_group   : Factor w/ 5 levels "16 to 21","22 to 29",..: 3 3 2 4 3 1 2 4 2 3 ...

Initial Data Analysis (IDA)

Source

Our data was sourced from Cosette Saunder’s PhD and honours thesis paper “Socially Acquired Nocebo Effects Generalize but Are Not Attenuated by Choice”. (ask about how much context we need to provide here)

Structure

The data contained 336 records of participants in the study, each with 51 variables. In particular, our research focused the following variables:

  • Baseline SSQ of 16 symptoms (quantitative, discrete): self-reported symptom severity of participants before undergoing VR, on a scale of 1 to 10.

  • Active SSQ of 16 symptoms (quantitative, discrete): self-reported symptom severity of participants after undergoing VR, on a scale of 1 to 10

  • The age of the participants (quantitative, discrete); they were then sorted into age groups – reclassed as ‘factor’ (qualitative, ordinal)

    • This was so that we could qualitatively class different ages and see their symptoms
  • Whether the participant has had previous VR experience (qualitative, nominal); this was reclassified from ‘character’ into ‘factor’.

    • R misidentified this as ‘chr’, it should be a qualitative variable
  • The change in active and baseline SSQ was calculated for each participant, each symptom (quantitative, discrete).

Code
filteredData
# A tibble: 336 × 53
     PID BSSQ_1 BSSQ_2 BSSQ_3 BSSQ_4 BSSQ_5 BSSQ_6 BSSQ_7 BSSQ_8 BSSQ_9 BSSQ_10
   <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>   <dbl>
 1  1201      0      0      0      0      0      0      0      0      0       0
 2  1351      4      2      0      1      4      0      0      0      4       0
 3  2351      1      7      0      0      2      0      0      0      1       0
 4  1151      0      0      0      0      0      0      0      0      0       0
 5  2201      0      0      0      0      0      0      0      0      0       0
 6  1251      2      5      1      0      4      0      0      0      2       0
 7  1101      0      0      0      2      1      0      0      0      1       0
 8  2101      0      1      1      0      0      0      0      0      0       0
 9  2251      2      2      0      0      0      0      1      0      0       0
10  1102      1      0      1      1      1      0      0      0      1       1
# ℹ 326 more rows
# ℹ 42 more variables: BSSQ_11 <dbl>, BSSQ_12 <dbl>, BSSQ_13 <dbl>,
#   BSSQ_14 <dbl>, BSSQ_15 <dbl>, BSSQ_16 <dbl>, ASSQ_1 <dbl>, ASSQ_2 <dbl>,
#   ASSQ_3 <dbl>, ASSQ_4 <dbl>, ASSQ_5 <dbl>, ASSQ_6 <dbl>, ASSQ_7 <dbl>,
#   ASSQ_8 <dbl>, ASSQ_9 <dbl>, ASSQ_10 <dbl>, ASSQ_11 <dbl>, ASSQ_12 <dbl>,
#   ASSQ_13 <dbl>, ASSQ_14 <dbl>, ASSQ_15 <dbl>, ASSQ_16 <dbl>, age <dbl>,
#   VRexperience <fct>, ssq_modelled <dbl>, d_1 <dbl>, d_2 <dbl>, d_3 <dbl>, …
Code
library(RColorBrewer)

data_experience = select(filteredData, VRexperience)
exp_counted = data_experience %>% count(VRexperience)

vr_pie = plot_ly(exp_counted, labels = ~VRexperience, values = ~n,
                 type = 'pie')
vr_pie <- vr_pie %>% layout(title = 'Distribution of VR experience',
                            showlegend = TRUE)

vr_pie
Code
data_age_groups = select(filteredData, age_group)
groups_counted = data_age_groups %>% count(age_group)

age_pie = plot_ly(groups_counted, labels = ~age_group, values = ~n,
                 type = 'pie')
age_pie <- age_pie %>% layout(title = 'Distribution of ages',
                            showlegend = TRUE)
age_pie

Limitations

Assumptions